{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e6c42d98-1acf-4d7c-bd10-5cf0aa9d502f",
   "metadata": {},
   "source": [
    "**Pre-requisites**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70f3a97f-053b-4b99-be86-b2543f817130",
   "metadata": {},
   "outputs": [],
   "source": [
    "!python -m pip install pandas pyarrow"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "76b58c64-3bff-4aae-be3f-7df32be2bca3",
   "metadata": {},
   "source": [
    "**Possible Solution - Retaining An Old Index**"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7aabcf40-8016-4eb0-990b-7880a818cc2d",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "**Note:** This code parses the dates as opposed to leaving them as strings."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "834761d7-aaee-4545-b2a8-94efae4c82ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "# This version will parse the \"date_of_birth\" column.\n",
    "# These dates will be formatted differently to the tutorial.\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "beach_boys = (\n",
    "    pd.read_csv(\n",
    "        \"band_members.csv\",\n",
    "        parse_dates=[\"date_of_birth\"],\n",
    "        dayfirst=True,\n",
    "    )\n",
    "    .convert_dtypes(dtype_backend=\"pyarrow\")\n",
    "    .assign(\n",
    "        date_of_birth=lambda beach_boys: beach_boys[\"date_of_birth\"].dt.date\n",
    "    )\n",
    ")\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00f37c3d-33a5-4b2a-98ed-9431ef125f89",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Tutorial version of the code.\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")\n",
    "\n",
    "beach_boys.index = range(1, 10)\n",
    "beach_boys = beach_boys.reset_index(names=\"old_index\")\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d6bcea77-6b70-4086-981b-1333a1ddd3b1",
   "metadata": {},
   "source": [
    "**Possible Solution - Using `.index` and row selection**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e1c40ea-0e4b-4b1d-99af-1d68a82ef32b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")\n",
    "\n",
    "beach_boys.index = [x for x in range(1, 20) if x % 2 == 0]\n",
    "\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3909e7e6-224c-4c28-a915-892a0cb014a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.loc[16:18]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b961441a-cc26-4d91-8edc-232f8c4e81bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.iloc[-2:]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c2c333f-aabb-429d-a16c-56b380a3a211",
   "metadata": {},
   "source": [
    "**Possible Solution - Using `.set_axis()`**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "12555390-e34b-4b07-ba0d-661452dd52be",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys = beach_boys.set_axis(labels=[x**2 for x in range(0, 9)])\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8d8c73db-4c2e-4f49-9f0e-bc038cf6f716",
   "metadata": {},
   "source": [
    "**Possible Solutions - Index Restoration**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54a4d206-9f0d-41b2-8604-7be79652c32d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")\n",
    "\n",
    "beach_boys = beach_boys.drop(labels=[3, 5])\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "165e61e6-160f-4a94-9c07-ef4449627c47",
   "metadata": {},
   "outputs": [],
   "source": [
    "# (i) Using .reset_index()\n",
    "beach_boys = beach_boys.reset_index(drop=True)\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "287eb5f2-3f58-43fc-beef-42ff89c390df",
   "metadata": {},
   "outputs": [],
   "source": [
    "# (ii) Using .index\n",
    "beach_boys.index = range(len(beach_boys))\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "351344f2-2db6-4054-9751-63436bc6ee67",
   "metadata": {},
   "source": [
    "**Possible Solution - Dealing With Duplicates**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0195d6d3-01ed-4f3e-b374-646be970d1ff",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n",
    "    dtype_backend=\"pyarrow\"\n",
    ")\n",
    "\n",
    "guitar_players = beach_boys.query(\"instrument == 'Guitar'\").reset_index(\n",
    "    drop=True\n",
    ")\n",
    "\n",
    "others = beach_boys.query(\"instrument != 'Guitar'\").reset_index(drop=True)\n",
    "\n",
    "all_beach_boys = pd.concat([guitar_players, others]).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "443301b0-b2b2-4e98-ac30-87cd5247b497",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_beach_boys.loc[[3]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6018960f-963a-4fec-9e92-6e96197e5b7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_beach_boys.iloc[[3]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d0f664b0-5903-490e-9714-a4c4dda2681a",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_beach_boys.filter(items=[1, 3], axis=\"index\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "63c18951-baa5-4244-b053-e104c7ddbeaa",
   "metadata": {},
   "source": [
    "**Possible Solution - Customising Existing Columns**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "969da9c7-30ca-4ae6-8d7a-925f19369984",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "beach_boys = pd.read_csv(\n",
    "    \"band_members.csv\",\n",
    ").convert_dtypes(dtype_backend=\"pyarrow\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d0de567e-5d9b-4852-af9d-d8cff07f4f5b",
   "metadata": {},
   "outputs": [],
   "source": [
    "beach_boys.index = beach_boys[\"last_name\"].str.cat(\n",
    "    beach_boys[\"first_name\"].str[0]\n",
    ")\n",
    "\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f62038bd-fa3e-4c51-bf62-09d32f30c719",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Alternative, with apply(). Will be slow for big DataFrames\n",
    "def calculate_user_ID(row):\n",
    "    return f\"{row['last_name']}{row['first_name'][0]}\"\n",
    "\n",
    "\n",
    "beach_boys.index = beach_boys.apply(calculate_user_ID, axis=1)\n",
    "\n",
    "beach_boys"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b480ab24-5dfe-4892-aa4d-ea07b74e9bd5",
   "metadata": {},
   "source": [
    "**Possible Solution - Index Alignment (1)**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9d37b244-13a7-42f3-9ad1-462ff59f23e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales = pd.read_csv(\"week1_record_sales.csv\").set_index(\"index\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ecd91233-8bdf-45fa-87a9-700f6056b750",
   "metadata": {},
   "outputs": [],
   "source": [
    "week2_sales = pd.read_csv(\"week2_record_sales.csv\").set_index(\"index\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db6bec66-fea7-41b3-a874-30c30b15d54c",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b76f6afe-0c08-4653-a40f-d82194cc2dc5",
   "metadata": {},
   "outputs": [],
   "source": [
    "week2_sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8fba8c2c-d90d-4158-9712-cfe0fde11140",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales.index = week2_sales.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16341960-0d3b-4aea-b650-a20c58673c2e",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e378f632-93c7-45fc-bb6b-3fa0ef54cf4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "week2_sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b1c9a420-3e1c-4eb9-ae87-eaba0079bc24",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales.loc[:, \"sales\"] + week2_sales.loc[:, \"sales\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2dfdfbe-5d03-4a60-a86f-5cdb24ac22e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales.merge(week2_sales, left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eda12215-7c8e-44e2-a110-67934f06a2d4",
   "metadata": {},
   "source": [
    "**Possible Solution - Index Alignment (2)**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c5b10d4-02e1-493a-8a4c-e87989998f3c",
   "metadata": {},
   "outputs": [],
   "source": [
    "week1_sales = week1_sales.set_index(\"day\")\n",
    "week2_sales = week2_sales.set_index(\"day\")\n",
    "\n",
    "week1_sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ba444f6-5581-4ad5-8bca-6e647563c4b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "week2_sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c792ee8-eb75-4be4-afff-f4f844c55129",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "week1_sales.loc[:, \"sales\"] + week2_sales.loc[:, \"sales\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9092f593-8067-4b8b-8582-6702a09699d2",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "week1_sales.merge(week2_sales, left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b4b6511d-ef2f-4e64-9359-325f6228e3a6",
   "metadata": {},
   "source": [
    "**Possible Solution - Dealing With A `MultiIndex`**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45c281f1-9c97-447d-894e-bd33196dbad7",
   "metadata": {},
   "outputs": [],
   "source": [
    "cereals = pd.read_csv(\"cereals.csv\").convert_dtypes(dtype_backend=\"pyarrow\")\n",
    "\n",
    "cereals = cereals.pivot_table(\n",
    "    values=\"fiber\",\n",
    "    index=[\"type\", \"manufacturer\"],\n",
    "    aggfunc=\"mean\",\n",
    ").reset_index(level=1)\n",
    "\n",
    "cereals"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "52bd7cce-e4c5-4b11-b0b7-4e7b7f1d5cdc",
   "metadata": {},
   "source": [
    "**Possible Solution - Creating A Meaningful Index**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0af37ac-c7d7-4c92-a22a-da74f645d622",
   "metadata": {},
   "outputs": [],
   "source": [
    "cereals = pd.read_csv(\"cereals.csv\").convert_dtypes(dtype_backend=\"pyarrow\")\n",
    "\n",
    "cereals = cereals.pivot_table(\n",
    "    values=\"fiber\", index=[\"manufacturer\", \"type\"], aggfunc=\"mean\"\n",
    ")\n",
    "\n",
    "cereals.index = cereals.index.to_flat_index()\n",
    "cereals"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
